#!/usr/bin/env python
# encoding: utf-8
"""
#-------------------------------------------------------------------#
#                   CONFIDENTIAL --- CUSTOM STUDIOS                 #     
#-------------------------------------------------------------------#
#                                                                   #
#                   @Project Name : Globallawonline                #
#                                                                   #
#                   @File Name    : checkmdlaw.py                      #
#                                                                   #
#                   @Programmer   : 李建                            #
#                                                                   #  
#                   @Start Date   : 2021/6/18 10:04                 #
#                                                                   #
#                   @Last Update  : 2021/6/18 10:04                 #
#                                                                   #
#-------------------------------------------------------------------#
# Classes:检查西政老师提供的缅甸国家的excel记录与pdf文件数量是否相同                                                          #
#                                                                   #
#-------------------------------------------------------------------#
"""
import os
import shutil

from dealpdf import PdfFunction
import hashlib


def read_excel_fg(excel_path, sheet_name, pdf_dir):
    """
    处理西政老师提供的缅甸法律整理汇总.xlsxEXCEL文件
    :param excel_path:excel文件详细路径
    :param sheet_name:数据表名
    :param pdf_dir:pdf文件路径
    :return:
    """
    import pandas as pd
    data = pd.read_excel(excel_path, sheet_name)
    data['website'] = ""
    data['文件名'] = ""
    data['哈希值计算'] = ""
    data['SYSID'] = ""
    data['数据库文件名'] = ""
    print(type(data))
    for i in range(0,len(data)):
        legname = str(data.iloc[i,1])
        filel = str(data.iloc[i,12])
        start_page= int(data.iloc[i,13])
        end_page = int(data.iloc[i,14])
        file = filel.rstrip(' \n')  # 去除右边的空格和换行字符
        if not filel.__contains__('.pdf'):
            file = filel.rstrip(' \n') + '.pdf'
        for root, dirs, files in os.walk(pdf_dir, topdown=False):
            if file in files:
                dirl = root.split('\\')
                website = dirl[8]
                file_path = os.path.join(root, file)
                filename = os.path.splitext(file)[0] + "%s_%s_%s.pdf" % (start_page,end_page,legname)
                SYSID = str(hashlib.md5(filename.encode('utf-8')).hexdigest())
                SYS_FLD_DIGITFILENAME = 'f' + SYSID + '.pdf'
                out_path = os.path.join(r"E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\缅甸\西政老师提供\导入\Law", website,SYS_FLD_DIGITFILENAME)
                out_pdf = PdfFunction().pdf_splitter(file_path,start_page,end_page,out_path)
                n = 15
                data.iloc[i,n+1] = website
                data.iloc[i,n+2] = file
                data.iloc[i,n+3] = filename
                data.iloc[i,n+4] = SYSID
                data.iloc[i,n+5] = SYS_FLD_DIGITFILENAME
                break
    data.to_excel(r'D:\GlobalLawFiles\合作数据导入\法律数据\缅甸法律.xlsx')

def read_excel(excel_path, sheet_name, pdf_dir):
    """
    处理西政老师提供的缅甸法律整理汇总.xlsxEXCEL文件
    :param excel_path:excel文件详细路径
    :param sheet_name:数据表名
    :param pdf_dir:pdf文件路径
    :return:
    """
    import pandas as pd
    data = pd.read_excel(excel_path, sheet_name)
    data['website'] = ""
    data['文件名'] = ""
    data['哈希值计算'] = ""
    data['SYSID'] = ""
    data['数据库文件名'] = ""
    for i in range(0,len(data)):
        legname = str(data.iloc[i,1])
        filel = str(data.iloc[i,7])
        file = filel.rstrip(' \n')  # 去除右边的空格和换行字符
        if not filel.__contains__('.pdf'):
            file = filel.rstrip(' \n') + '.pdf'
        for root, dirs, files in os.walk(pdf_dir, topdown=False):
            if file in files:
                dirl = root.split('\\')
                website = dirl[8]
                file_path = os.path.join(root, file)
                filename = os.path.splitext(file)[0] + "_%s.pdf" % legname
                SYSID = str(hashlib.md5(filename.encode('utf-8')).hexdigest())
                SYS_FLD_DIGITFILENAME = 'f' + SYSID + '.pdf'
                out_dir = os.path.join(r"E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\缅甸\西政老师提供\导入\Law", website)
                out_path = os.path.join(out_dir,SYS_FLD_DIGITFILENAME)
                if not os.path.exists(out_dir):
                    os.makedirs(out_dir)
                try:
                    shutil.copyfile(file_path, out_path)
                except Exception as e:
                    with open("copyerro.txt", "a", encoding='utf-8') as f:
                        f.write(file_path + '\n')
                    print("文件复制出错:%s(%s)" % (file_path, str(e)))
                n = 7
                data.iloc[i, n + 1] = website
                data.iloc[i, n + 2] = file
                data.iloc[i, n + 3] = filename
                data.iloc[i, n + 4] = SYSID
                data.iloc[i, n + 5] = SYS_FLD_DIGITFILENAME
                break
    data.to_excel(r'D:\GlobalLawFiles\合作数据导入\法律数据\缅甸法律—导出.xlsx')


excel_path = r'E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\缅甸\西政老师提供\缅甸法律汇总\国际条约—处理.xls'
shhetname = "example"
pdf_dir = r"E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\缅甸\西政老师提供\International Laws"
read_excel(excel_path,shhetname, pdf_dir)

